Last updated on 2025-10-02.
Show code
library (dplyr)
library (dbplyr)
library (ggplot2)
for (i in list.files (here:: here ("R" ), full.names = TRUE )) {
source (i)
}
# SAMPLE NAME
## specify sample name
sample.names <- c (
# dmel
# "dmel-head",
# "dmel-body",
# "dmel-heart"
# mmus
"mmus-brain_stem" ,
"mmus-cerebellum" ,
"mmus-hypothalamus" ,
"mmus-heart"
# # panu
# "panu-brain_stem",
# "panu-cerebellum",
# "panu-hypothalamus",
# "panu-heart",
# "panu-scn"
)
# sample.cycles <- c("LD", "DD")
## SPECIFY THE DATASET TO BUILD GCN WITH
ref.sample <- "mmus-brain_stem"
writeLines (
glue:: glue ("Reference tissue: {ref.sample}" )
)
Reference tissue: mmus-brain_stem
Prep data
Expected format: one row per gene, one col per sample
Structure of reference tissue data:
ONE-SHOT (make_modules)
Make modules in one call
Show code
mods <- timecourseRnaseq:: make_modules (
data = tidydata.db[[ref.sample]],
log2 = TRUE ,
id_column = "gene_name" ,
min_expression = NULL , # automatically estimated
min_timepoints = NULL , # automatically estimated
method = "wgcna" ,
qc = TRUE ,
sim_method = "kendall" ,
soft_power = NULL , # automatically estimated
min_module_size = 50 ,
max_modules = 16 ,
merge_cutoff_similarity = 0.9 ,
plot_network = FALSE ,
# plot_network_min_edge = 0.5, # used when `plot_network == TRUE`
tidy_modules = TRUE
)
---------------------------------------------------
1. Log2-transform and subset
---------------------------------------------------
Applying log2-transformation...Done.
Estimated min_expression = 8
Estimated min_timepoints = 6
Subsetting data...Done.
[ NOTE ]: After subsetting, 8893 of 17406 rows remain.
Flagging genes and samples with too many missing values...
..step 1
All okay!Visualizing the log-transformed data
---------------------------------------------------
2. Calculate similarity of expression
---------------------------------------------------
Running gene-gene similarity...Done.
---------------------------------------------------
3. Create adjacency matrix
---------------------------------------------------
Performing network topology analysis to pick
soft-thresholding power...
Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
1 1 0.7240 1.9500 0.654 4510 4900.0 6020
2 2 0.5090 0.5730 0.389 2970 3230.0 4790
3 3 0.0298 0.0544 0.363 2170 2280.0 4040
4 4 0.2950 -0.1720 0.710 1690 1680.0 3510
5 5 0.6060 -0.3070 0.785 1360 1280.0 3110
6 6 0.7420 -0.3970 0.817 1130 995.0 2800
7 7 0.8110 -0.4740 0.818 951 790.0 2540
8 8 0.8540 -0.5280 0.841 815 634.0 2330
9 9 0.8860 -0.5790 0.866 708 516.0 2140
10 10 0.8960 -0.6210 0.868 621 424.0 1980
11 12 0.9040 -0.6890 0.877 489 294.0 1730
12 15 0.9040 -0.7770 0.886 359 182.0 1440
13 18 0.8990 -0.8420 0.895 275 118.0 1220
14 21 0.8930 -0.8910 0.899 216 80.2 1050
Plotting resutls from the network topology analysis...
Done.
[ NOTE, FIGURE ]: Red horizontal line indicates a signed R^2 of 0.9
Setting soft-thresholding power to: 8.
Power-transforming the gene-gene similarity matrix...Done.
---------------------------------------------------
4. Convert into topological overlap matrix (dissTOM)
---------------------------------------------------
Creating dissTOM...Done.
Performing hierarchical clustering on dissTOM...Done.
---------------------------------------------------
5. Identify modules (clusters)
---------------------------------------------------
Merging modules that have a correlation ≥ 0.9 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.
Module (cluster) size:
mergedColors
antiquewhite4 bisque4 blue coral1 coral2
215 669 1938 60 287
cyan darkmagenta darkolivegreen darkorange2 darkred
337 117 435 96 157
darkseagreen4 darkslateblue floralwhite grey60 honeydew1
61 87 97 172 388
ivory lavenderblush3 lightcyan lightcyan1 lightgreen
98 155 174 98 165
magenta mediumorchid mediumpurple3 navajowhite2 orange
202 55 105 76 154
orangered4 paleturquoise palevioletred3 pink plum1
108 123 301 623 110
purple red salmon skyblue skyblue2
199 234 186 127 54
steelblue white yellow4 yellowgreen
123 146 50 111
Merging modules that have a correlation ≥ 0.85 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.
Module (cluster) size:
mergedColors
bisque4 coral1 darkgrey darkmagenta darkorange2
1397 440 3195 117 194
darkred darkseagreen4 grey60 ivory magenta
157 61 172 672 325
mediumorchid mediumpurple3 navajowhite2 orange palevioletred3
163 105 76 154 736
plum1 saddlebrown skyblue skyblue2 white
110 287 282 54 146
yellow4
50
Merging modules that have a correlation ≥ 0.8 ...Done.
[ NOTE, FIGURE ] Plotting identified clusters before and after merging.
Module (cluster) size:
mergedColors
bisque4 blue coral1 darkmagenta darkorange2
2462 3195 440 796 194
darkseagreen4 ivory magenta mediumorchid mediumpurple3
61 748 325 163 105
orange skyblue2 white yellow4
154 54 146 50
Cutoff used: 0.8
Number of modules identified: 14
Calculating module-module similarity based
on module-eigengene-expression...Done.
Tidying module names...Done.
Plotting adjacency matrix for module-module similarity...
---------------------------------------------------
6. Tidy modules (clusters)
---------------------------------------------------
Modify network plot
Internal function; use ::: to call
Show code
timecourseRnaseq::: plot_adj_as_network (
matrix = mods[["adj_matrix_ME" ]][["ME" ]],
# layout = igraph::layout.sugiyama, # default view
layout = igraph:: layout_in_circle, # changed
min_edge = 0.6 ,
node_label_size = 1.2 ,
node_size = 30 ,
edge_size = 3 ,
node_frame_col = "grey20" ,
node_fill_col = "grey80" ,
vertex.frame.width = 3
)
Visualizing a simplified representation of the circadian GCN
Annotate the network
Identify rhythmic modules
Show code
# Obtain a list of genes in each module
l_module_genes <- mods[["module_genes" ]] |>
arrange (module_identity) |>
group_split (module_identity) |>
purrr:: map (
~ .x |> pull (gene_name)
) |>
setNames (unique (mods[["module_genes" ]]$ module_identity))
# Load results of rhythmicity analyses
db_rhy <- purrr:: map (
sample.names,
~ load_rhy_genes (
sample = .x
)
) |>
setNames (sample.names)
# Set your p-value of choice
###- ### - ### - ### - ### - ### - ### - ### -
# col_pval = "BH.Q"
col_pval = "default.pvalue"
# col_pval = "raw.pvalue"
###- ### - ### - ### - ### - ### - ### - ### -
# Obtain a list of rhythmic genes in each tissue
l_rhy_genes <- purrr:: map (
sample.names,
~ db_rhy[[.x]] |>
purrr:: map (
~ .x |>
filter (
if_all (
all_of (col_pval),
~ .x < 0.05
)
) |>
filter (
ID %in% unlist (l_module_genes)
) |>
pull (1 ) |>
unique ()
) |>
purrr:: compact ()
) |>
setNames (sample.names)
Modules vs. Rhythmic genes
Show code
# LIST ONE - WGCNA modules
list1 <- l_module_genes
sapply (list1, length) |> print ()
C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13 C14
154 163 3195 105 748 146 54 796 2462 440 194 61 50 325
Show code
trash <- purrr:: map (
sample.names,
function (x) {
cat ("Tissue:" , x, " \n " )
## LIST TWO - rhythmic genes
list2 <- l_rhy_genes[[x]]
sapply (list2, length) |> print ()
## CHECK FOR OVERLAP
# define size of genome
size = length (unique (c (unlist (list1), unlist (list2))))
# make a GOM object
gom.1 v2 <- GeneOverlap:: newGOM (
list2,
list1,
genome.size = size
)
GeneOverlap:: drawHeatmap (
gom.1 v2,
adj.p = TRUE ,
cutoff = 0.05 ,
what = "odds.ratio" ,
# what="Jaccard",
log.scale = T,
note.col = "black" ,
grid.col = "Oranges"
)
gom.1 v2
}
) |>
setNames (sample.names)
Tissue: mmus-brain_stem
ARS empJTK GeneCycle JTK meta2d RAIN
1010 361 368 198 783 687
Tissue: mmus-cerebellum
ARS empJTK GeneCycle JTK meta2d RAIN
2663 701 641 241 1952 593
Tissue: mmus-hypothalamus
ARS empJTK GeneCycle JTK meta2d RAIN
2042 559 481 223 1294 609
Tissue: mmus-heart
ARS empJTK GeneCycle JTK meta2d RAIN
2550 987 785 532 2251 1316
Module preservation
Prep data
Show code
# tidydata.db |>
# purrr::map_int(
# ~ nrow(.x)
# )
# We work with two sets:
nSets = 2 ;
# Find the common set of genes across all samples
common_genes <- tidydata.db |>
purrr:: map (
~ .x |>
pull (gene_name) |>
unique ()
) |>
purrr:: reduce (
intersect
) |>
intersect (
mods[["module_genes" ]]$ gene_name
)
# filter to keep only genes used in creating REF network
samples.list <- tidydata.db |>
purrr:: map (
~ .x |>
filter (
gene_name %in% common_genes
)
)
test_samples <- setdiff (sample.names, ref.sample)
# OPTION 1: Read previous run from memory
l_mp <- readRDS (
here:: here (
glue:: glue (
"result/tissue-comparison/ref_{ref.sample}_module-preservation.rds"
)
)
)
# # OPTION 2: Run module preservation for each tissue pair (Takes time)
# l_mp <- purrr::map(
# test_samples,
# function (x) {
# multiExpr <- prep_data_module_preservation(
# data = samples.list,
# ref = ref.sample,
# test = x
# )
#
# # CHECKS
# #---#---#---#---#---#---#---#---#---#---#---#---
# # Check that the data has the correct format for many functions
# # operating on multiple sets:
# exprSize = WGCNA::checkSets(multiExpr)
# # Check that all genes and samples have sufficiently low numbers of
# # missing values.
# gsg = WGCNA::goodSamplesGenesMS(multiExpr, verbose = 1);
# cat("All samples okay?\n", gsg$allOK, "\n")
# #---#---#---#---#---#---#---#---#---#---#---#---
#
# # prep data
# #---#---#---#---#---#---#---#---#---#---#---#---
# # Expression data
# multiExpr_1 = list(
# ref = list(data = multiExpr[[1]]$data),
# test = list(data = multiExpr[[2]]$data)
# )
# ## filter to keep only the genes that we are working with
# mod.identity <- mods[["module_genes"]] |>
# select(
# gene_name,
# old_labels
# ) |>
# filter(
# gene_name %in% common_genes
# ) |>
# # !!this step is necessary!! #
# arrange(gene_name)
# ## specify the module identity of the genes
# moduleColors <- mod.identity |> pull(old_labels)
# multiColor = list(ref = moduleColors)
#
# # Run module preservation
# #---#---#---#---#---#---#---#---#---#---#---#---
# mp = WGCNA::modulePreservation(
# multiExpr_1,
# multiColor,
# referenceNetworks = 1,
# nPermutations = 100,
# calculateQvalue = FALSE,
# quickCor = 0,
# verbose = 1
# )
#
# mp
# }
# )
# saveRDS(
# l_mp,
# here::here(
# glue::glue(
# "result/tissue-comparison/ref_{ref.sample}_module-preservation.rds"
# )
# )
# )
Show code
l_zsummary <- purrr:: map2 (
l_mp,
test_samples,
function (mp, y) {
ref = 1
test = 2
statsObs = cbind (
mp$ quality$ observed[[ref]][[test]],
mp$ preservation$ observed[[ref]][[test]]
)
statsZ = cbind (
mp$ quality$ Z[[ref]][[test]][,- 1 ],
mp$ preservation$ Z[[ref]][[test]][,- 1 ]
)
# Compare preservation to quality:
z.stats <- cbind (
statsObs[, c ("moduleSize" , "medianRank.pres" , "medianRank.qual" )],
signif (statsZ[, c ("Zsummary.pres" , "Zsummary.qual" )], 2 )
)
mains = c (
"Preservation Median rank" ,
"Preservation Zsummary"
)
pd <- position_dodge (0.5 )
# Plot
out <- z.stats |>
tibble:: rownames_to_column ("old_labels" ) |>
left_join (
distinct (mods[["module_genes" ]][- 1 ]),
by= "old_labels"
) |>
select (
module_name = module_identity,
module_size = moduleSize,
everything ()
) |>
filter (
! is.na (module_name)
) |>
mutate (
module_name = factor (
module_name,
levels = unique (mods[["module_genes" ]]$ module_identity)
)
)
## PLOT PRESERVATION Z-SUMMARY
p <- out |>
ggplot (aes (x= log10 (module_size), y= Zsummary.pres)) +
geom_hline (yintercept = c (2 ,10 ), col= "darkred" , alpha= 0.5 ) +
geom_point (
alpha= 0.5 ,
size= 20 ,
col= "lightgrey" ,
position = pd
) +
geom_text (
aes (label= module_name),
check_overlap = TRUE ,
size = 5
) +
theme_bw (20 ) +
scale_x_continuous (limits = c (0 ,max (log10 (1000 ))+ 0.5 ),
breaks = c (0 ,1 ,2 ,3 ),
labels = c ("0" ,"10" ,"100" ,"1000" )) +
xlab ("module size (genes)" ) +
ylab (mains[2 ]) +
ggtitle (paste0 ("Test: " , y))
print (p)
out
}
) |>
setNames (
test_samples
)
Summary
Show code
purrr:: map_dfr (
test_samples,
~ l_zsummary[[.x]] |>
select (
modules = module_name,
zsumm = Zsummary.pres
) |>
mutate (
# ref = which.sample,
tissue = .x,
.before = 1
)
) |>
as_tibble () |>
filter (
zsumm > 10
) |>
tidyr:: pivot_wider (
names_from = modules,
values_from = zsumm
) |>
mutate (
across (
! tissue,
~ if_else (
is.na (.x),
"-" ,
round (.x, 2 ) |> as.character ()
)
)
) |>
select (
tissue,
any_of (
paste0 ("C" , 1 : 20 )
)
) |>
kableExtra:: kable ()
mmus-cerebellum
-
18
-
18
-
mmus-hypothalamus
12
20
12
23
15
Network statistics
From WGCNA-tutorial
Intramodular connectivity
“We begin by calculating the intramodular connectivity for each gene. (In network literature, connectivity is often referred to as”degree”.) The function intramodularConnectivity computes:
the whole network connectivity kTotal ,
the within (intra)module connectivity kWithin ,
the extra-modular connectivity kOut =kTotal-kWithin, and
the difference of the intra- and extra-modular connectivities kDiff = kIn - kOut = 2*kIN-kTotal
Show code
# From what I can tell, colorh1 in the tutorial refers to moduleColors
colorh1 <- mods[["modules" ]]$ colors
adj_matrix <- mods[["adj_matrix" ]]
# Calculate the connectivities of the genes
Alldegrees1 = WGCNA:: intramodularConnectivity (
adjMat = adj_matrix,
colors = colorh1
) |>
mutate (
gene_name = rownames (adj_matrix),
across (
matches ("^k" ),
~ round (.x, 2 )
)
)
Plotting the mean (± 95% CI) connectivity of genes in different modules
Show code
pd <- position_dodge (0.1 )
# which_var <- "kTotal"
which_var <- c ("kTotal" , "kWithin" , "kOut" , "kDiff" )
Alldegrees1 |>
# rownames_to_column("gene_name") %>%
left_join (
mods[["module_genes" ]],
join_by (gene_name)
) |>
# PLOT FROM RAW DATA
mutate (
module_identity = factor (
module_identity,
levels = paste0 (
"C" ,
sort (
unique (mods[["module_genes" ]]$ module_identity) |>
stringr:: str_replace ("C" , "" ) |>
as.integer ()
)
) |> rev ()
)
) |>
summarySE (
measurevar = which_var,
groupvars = "module_identity"
) |>
mutate (
type = factor (
type,
levels = which_var
)
) |>
# Plot
ggplot (
aes (
y = module_identity,
x = mean,
group = interaction (module_identity, type)
)
) +
geom_vline (xintercept = 0 , col = "maroon" , alpha = 0.7 , lwd = 1.2 ) +
labs (
y = "" ,
x = glue:: glue (
"Connectivity"
),
title = ""
) +
## Add error bar here
geom_errorbar (
aes (xmin = mean- ci, xmax = mean+ ci),
width = 0.3 ,
position= pd,
lwd = 1.3 ,
col= "black" ,
alpha = 1
) +
# Add the points on top of the error bars
geom_point (
position = pd,
size = 3 ,
col = "black" ,
fill = "orange" ,
show.legend = F,
pch = 21 ,
alpha = 0.9
) +
facet_wrap (
~ type,
nrow = 2 ,
scales = "free_x"
) +
scale_x_continuous (
n.breaks = 4
) +
theme_bw (25 ) +
# scale_color_manual(values=c("#F20505","#F5D736","#0FBF67")) +
theme (
legend.position = "none"
)